{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "collapsed": true,
        "pycharm": {
          "is_executing": false
        }
      },
      "outputs": [],
      "source": "import pandas as pd\nimport numpy as np\nimport seaborn as sns"
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "outputs": [],
      "source": "# Read Data \n# 因为jupyter服务配置，所以文件目录和项目文件目录存在差异，如果运行时请更改\ntrain \u003d pd.read_csv(\"./data/train.csv\",na_values\u003d\"\\\\N\")\ntest \u003d pd.read_csv(\"./data/test.csv\", na_values\u003d\"\\\\N\")\nlisting_info \u003d pd.read_csv(\"./data/listing_info.csv\")\nuser_info \u003d pd.read_csv(\"./data/user_info.csv\")\nuser_repay_logs \u003d pd.read_csv(\"./data/user_repay_logs.csv\")\nuser_taglist \u003d pd.read_csv(\"./data/user_taglist.csv\")",
      "metadata": {
        "pycharm": {
          "metadata": false,
          "name": "#%%\n",
          "is_executing": false
        }
      }
    },
    {
      "cell_type": "markdown",
      "source": "# 数据基本情况查看",
      "metadata": {
        "pycharm": {
          "metadata": false,
          "name": "#%% md\n"
        }
      }
    },
    {
      "cell_type": "code",
      "source": "train.info()",
      "metadata": {
        "pycharm": {
          "metadata": false,
          "name": "#%%\n",
          "is_executing": false
        }
      },
      "execution_count": 7,
      "outputs": [
        {
          "name": "stdout",
          "text": [
            "\u003cclass \u0027pandas.core.frame.DataFrame\u0027\u003e\nRangeIndex: 1000000 entries, 0 to 999999\nData columns (total 7 columns):\nuser_id          1000000 non-null int64\nlisting_id       1000000 non-null int64\nauditing_date    1000000 non-null object\ndue_date         1000000 non-null object\ndue_amt          1000000 non-null float64\nrepay_date       882808 non-null object\nrepay_amt        882808 non-null float64\ndtypes: float64(2), int64(2), object(3)\nmemory usage: 53.4+ MB\n"
          ],
          "output_type": "stream"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": "1. 100万条记录，之后可以考虑每个月的分布，看是否有特殊时间点，比如还款集中的时间段\n2. 882808条还款记录，逾期率11.8%.可以考虑逾期率的变化",
      "metadata": {
        "pycharm": {
          "metadata": false,
          "name": "#%% md\n"
        }
      }
    },
    {
      "cell_type": "code",
      "source": "test.info()",
      "metadata": {
        "pycharm": {
          "metadata": false,
          "name": "#%% \n",
          "is_executing": false
        }
      },
      "execution_count": 8,
      "outputs": [
        {
          "name": "stdout",
          "text": [
            "\u003cclass \u0027pandas.core.frame.DataFrame\u0027\u003e\nRangeIndex: 130000 entries, 0 to 129999\nData columns (total 5 columns):\nuser_id          130000 non-null int64\nlisting_id       130000 non-null int64\nauditing_date    130000 non-null object\ndue_date         130000 non-null object\ndue_amt          130000 non-null float64\ndtypes: float64(1), int64(2), object(2)\nmemory usage: 5.0+ MB\n"
          ],
          "output_type": "stream"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": "1. 共13万条记录，2,3月份借款数量较少\n2. ？是怎样一个抽样逻辑",
      "metadata": {
        "pycharm": {
          "metadata": false,
          "name": "#%% md\n"
        }
      }
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "outputs": [
        {
          "name": "stdout",
          "text": [
            "\u003cclass \u0027pandas.core.frame.DataFrame\u0027\u003e\nRangeIndex: 5484891 entries, 0 to 5484890\nData columns (total 6 columns):\nuser_id          int64\nlisting_id       int64\nauditing_date    object\nterm             int64\nrate             float64\nprincipal        int64\ndtypes: float64(1), int64(4), object(1)\nmemory usage: 251.1+ MB\n"
          ],
          "output_type": "stream"
        }
      ],
      "source": "listing_info.info()",
      "metadata": {
        "pycharm": {
          "metadata": false,
          "name": "#%%\n",
          "is_executing": false
        }
      }
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "outputs": [
        {
          "data": {
            "text/plain": "   user_id  listing_id auditing_date  term  rate  principal\n0   316610     1556649    2017-11-26     9   7.6       4800\n1    62002     1556633    2017-11-26     6   7.6       4000\n2   192135     1556629    2017-11-26    12   8.0       8660\n3   487382     1556628    2017-11-26     9   7.6       4780\n4   235186     1556627    2017-11-26     9   7.6       1480",
            "text/html": "\u003cdiv\u003e\n\u003cstyle scoped\u003e\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n\u003c/style\u003e\n\u003ctable border\u003d\"1\" class\u003d\"dataframe\"\u003e\n  \u003cthead\u003e\n    \u003ctr style\u003d\"text-align: right;\"\u003e\n      \u003cth\u003e\u003c/th\u003e\n      \u003cth\u003euser_id\u003c/th\u003e\n      \u003cth\u003elisting_id\u003c/th\u003e\n      \u003cth\u003eauditing_date\u003c/th\u003e\n      \u003cth\u003eterm\u003c/th\u003e\n      \u003cth\u003erate\u003c/th\u003e\n      \u003cth\u003eprincipal\u003c/th\u003e\n    \u003c/tr\u003e\n  \u003c/thead\u003e\n  \u003ctbody\u003e\n    \u003ctr\u003e\n      \u003cth\u003e0\u003c/th\u003e\n      \u003ctd\u003e316610\u003c/td\u003e\n      \u003ctd\u003e1556649\u003c/td\u003e\n      \u003ctd\u003e2017-11-26\u003c/td\u003e\n      \u003ctd\u003e9\u003c/td\u003e\n      \u003ctd\u003e7.6\u003c/td\u003e\n      \u003ctd\u003e4800\u003c/td\u003e\n    \u003c/tr\u003e\n    \u003ctr\u003e\n      \u003cth\u003e1\u003c/th\u003e\n      \u003ctd\u003e62002\u003c/td\u003e\n      \u003ctd\u003e1556633\u003c/td\u003e\n      \u003ctd\u003e2017-11-26\u003c/td\u003e\n      \u003ctd\u003e6\u003c/td\u003e\n      \u003ctd\u003e7.6\u003c/td\u003e\n      \u003ctd\u003e4000\u003c/td\u003e\n    \u003c/tr\u003e\n    \u003ctr\u003e\n      \u003cth\u003e2\u003c/th\u003e\n      \u003ctd\u003e192135\u003c/td\u003e\n      \u003ctd\u003e1556629\u003c/td\u003e\n      \u003ctd\u003e2017-11-26\u003c/td\u003e\n      \u003ctd\u003e12\u003c/td\u003e\n      \u003ctd\u003e8.0\u003c/td\u003e\n      \u003ctd\u003e8660\u003c/td\u003e\n    \u003c/tr\u003e\n    \u003ctr\u003e\n      \u003cth\u003e3\u003c/th\u003e\n      \u003ctd\u003e487382\u003c/td\u003e\n      \u003ctd\u003e1556628\u003c/td\u003e\n      \u003ctd\u003e2017-11-26\u003c/td\u003e\n      \u003ctd\u003e9\u003c/td\u003e\n      \u003ctd\u003e7.6\u003c/td\u003e\n      \u003ctd\u003e4780\u003c/td\u003e\n    \u003c/tr\u003e\n    \u003ctr\u003e\n      \u003cth\u003e4\u003c/th\u003e\n      \u003ctd\u003e235186\u003c/td\u003e\n      \u003ctd\u003e1556627\u003c/td\u003e\n      \u003ctd\u003e2017-11-26\u003c/td\u003e\n      \u003ctd\u003e9\u003c/td\u003e\n      \u003ctd\u003e7.6\u003c/td\u003e\n      \u003ctd\u003e1480\u003c/td\u003e\n    \u003c/tr\u003e\n  \u003c/tbody\u003e\n\u003c/table\u003e\n\u003c/div\u003e"
          },
          "metadata": {},
          "output_type": "execute_result",
          "execution_count": 10
        }
      ],
      "source": "listing_info.head()",
      "metadata": {
        "pycharm": {
          "metadata": false,
          "name": "#%%\n",
          "is_executing": false
        }
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "outputs": [],
      "source": "",
      "metadata": {
        "pycharm": {
          "metadata": false,
          "name": "#%%\n",
          "is_executing": true
        }
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "outputs": [],
      "source": "\n",
      "metadata": {
        "pycharm": {
          "metadata": false,
          "name": "#%%\n"
        }
      }
    }
  ],
  "metadata": {
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 2
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython2",
      "version": "2.7.6"
    },
    "kernelspec": {
      "name": "python3",
      "language": "python",
      "display_name": "Python 3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}